library(Rwordseg)
library(tm)
library(jiebaR)
library(jiebaRD)
library(dplyr)
library(ggplot2)
library(readr)
library(stringr)
library(coefplot)
library(estimatr)

#------------------------------------------------------------------------------------------
## Method 2. ANTUSD Sentiment Score 
#------------------------------------------------------------------------------------------

#Step 2.1 Getting the Sentiment Scores

ANTUSD <- read.csv("ANTUSD.csv", header = FALSE)# select the sentiment dic in csv file
Tok_Mob <- read_csv("Tokenization_Mob.csv") # because tokenization takes a long time, this is directly using the output files from tokenization
Tok_Pac <- read_csv("Tokenization_Pac.csv")

# This runs a long time. If want to use output directly, jump to line 66.
ANTUSD_Score <- c(NULL)
for (i in 1:length(ANTUSD$V2)){
  ANTUSD_Score[i] <- ANTUSD$V2[i]
}

Score_Mob <- as.data.frame(1:length(Mob))
for (i in 1:length(Mob)){
  for (x in 1:length(Row_Mob[i,])){
    Score_Mob[i,x+1] <- ANTUSD_Score[Row_Mob[i,x]]
  }
}

Score_Mob[is.na(Score_Mob)] <- 0
Score_Mob <- Score_Mob[,-1]

ANTUSD_Score_Mob <- rowSums(Score_Mob)

ANTUSD_Score_Mob <- as.data.frame(ANTUSD_Score_Mob)
colnames(ANTUSD_Score_Mob) <- c("ANTUSD_Score")
ANTUSD_Score_Mob$Date <- Mob_Date
ANTUSD_Score_Mob$Campaigns <- Mob_Campaigns

write.csv(ANTUSD_Score_Mob,file = "ANTUSD_Score_Mob.csv")

Score_Pac <- as.data.frame(1:length(Pac))
for (i in 1:length(Pac)){
  for (x in 1:length(Row_Pac[i,])){
    Score_Pac[i,x+1] <- ANTUSD_Score[Row_Pac[i,x]]
  }
}

Score_Pac <- Score_Pac[,-1]
Score_Pac[is.na(Score_Pac)] <- 0

ANTUSD_Score_Pac <- rowSums(Score_Pac)

ANTUSD_Score_Pac <- as.data.frame(ANTUSD_Score_Pac)
colnames(ANTUSD_Score_Pac) <- c("ANTUSD_Score")
ANTUSD_Score_Pac$Date <- Pac_Date
ANTUSD_Score_Pac$Campaigns <- Pac_Campaigns

write.csv(ANTUSD_Score_Pac,file = "ANTUSD_Score_Pac.csv")

#Analyzing the ANTUSD Sentiment Scores

#Preparing the data
ANTUSD_Mob <- read_csv("ANTUSD_Score_Mob.csv") #using the output scores directly
ANTUSD_Pac <- read_csv("ANTUSD_Score_Pac.csv")

ANTUSD_Mob$Campaign_Type <- "Mobilization Campaigns"
ANTUSD_Pac$Campaign_Type <- "Pacification Campaigns"

ANTUSD_Scores_Combined <- rbind(ANTUSD_Mob, ANTUSD_Pac)

# ==================== Table 7.6 ===============================================
#Calculating Means by campaign types
ANTU_Scores_Mob_mean <- ANTUSD_Mob %>%
  summarise(mob_average=mean(ANTUSD_Score))

ANTU_Scores_Pac_mean <- ANTUSD_Pac %>%
  summarise(pac_average=mean(ANTUSD_Score))

#Putting all means in a comparison table
mean_data <- cbind(c(ANTU_Scores_Mob_mean$mob_average),
                   c(ANTU_Scores_Pac_mean$pac_average))
colnames(mean_data) <- c("Mobilization Campaign","Pacification Campaign")
rownames(mean_data) <- c("Sentiment Scores")
# xtable(mean_data) # Same with stargazer
stargazer(mean_data, type = 'latex', title = 'Average of sentiment scores') #Put Latex codes into Latex to generate table

# ttestS
t.test(ANTUSD_Pac$ANTUSD_Score, ANTUSD_Mob$ANTUSD_Score)

# ANOVA
oneway.test(ANTUSD_Score ~ Campaign_Type, data = ANTUSD_Scores_Combined) 
# Same results with that of the ttest

# ==================== Figure 7.6 =====================================================
colnames(ANTUSD_Scores_Combined) <- c('Article','Sentiment Score','Date','Campaigns','Campaign Type')

summary_Y1 <-
  ANTUSD_Scores_Combined %>% group_by(`Campaign Type`) %>%
  do(tidy(lm_robust(`Sentiment Score` ~ 1, data = .))) %>%
  mutate(`Sentiment Score` = estimate)

ggplot(data = ANTUSD_Scores_Combined, aes(x = `Campaign Type`,y = `Sentiment Score`,colour = `Campaign Type`)) +
  geom_point(position = position_jitter(width = .25, height = .25),
             alpha = 0.2, stroke = 0, colour = 'azure4')+
  geom_point(data = summary_Y1)+
  geom_errorbar(data = summary_Y1, aes(ymin = conf.low, ymax = conf.high),
                width = 0)+
  ylab("")+ ylim(-3,50)+
  theme(strip.background = element_blank(),
        axis.title.x = element_blank(),
        axis.text.x = element_text(face = "bold"),
        legend.position = 'right')+
  scale_color_manual(values = c('indianred','seagreen'))+
  ggtitle('Sentiment Score of Mobilization and Pacification Campaigns - ANTUSD')

# ==================== Figure 7.7 =====================================================
innerCI.n <- 1.64 #set so exclusion of CI implies two sided significance of 0.1
outerCI.n <- 1.96 #set so exclusion of CI implies two sided significance of 0.05

coefplot(lm(`Sentiment Score` ~ `Campaign Type` -1, data=ANTUSD_Scores_Combined),
                            newNames = c('`Campaign Type`Pacification Campaigns' = 'Pacification Campaigns',
                                         '`Campaign Type`Mobilization Campaigns'= 'Mobilization Campaigns'), 
                            intercept=FALSE, lwdOuter=0.5, innerCI=innerCI.n, outerCI= outerCI.n, color = 'black',
                            title = 'Coefficient of Mobilization and Pacification Campaigns') + theme_bw()

# ==================== Figure 7.8 =====================================================
labels_boxplot <- c(India_1962 = 'India 1962', Soviet_1969 = 'Soviet 1969', Vietnam_1974 = 'Vietnam 1974',
                    Vietnam_1979 = 'Vietnam 1979', Japan_1990 = 'Japan 1990',Japan_1996 = 'Japan 1996',
                    Japan_2005 = 'Japan 2005', Japan_2010 = 'Japan 2010', Japan_2012 = 'Japan 2012',
                    Philippines_2016 = 'Philippines 2016', India_2017 = 'India 2017')

boxplot <- ggplot(ANTUSD_Scores_Combined, aes(x = `Sentiment Score`, y = Campaigns, colour = `Campaign Type`)) +
  geom_jitter(shape = 15, alpha = 0.15,
              color = "steelblue",
              position = position_jitter(0.21)) +
  geom_boxplot()+
  geom_vline(aes(xintercept = 0),colour = 'black',linetype = 'dashed')+
  ylab("Media Campaigns")+
  scale_color_manual(values = c('indianred','seagreen'))

boxplot$data$Campaigns <- factor(boxplot$data$Campaigns, 
                                 levels = rev(c("India_1962","Soviet_1969","Vietnam_1974",
                                                "Vietnam_1979","Japan_1990","Japan_1996",
                                                "Japan_2005","Japan_2010","Japan_2012",
                                                "Philippines_2016","India_2017")),
                                 ordered = TRUE)

boxplot <- boxplot + scale_y_discrete(labels = labels_boxplot) + xlim(-50,100)

boxplot
#note that this is showing the median, interquartile range, the min & max, and jitterplot.

# ==================== Figure 7.9 =====================================================
model <- lm(`Sentiment Score` ~ Campaigns -1, data=ANTUSD_Scores_Combined)
coef_graph <-coefplot(model, 
                      intercept=FALSE, lwdOuter=0.5, innerCI=innerCI.n, outerCI= outerCI.n,
                      color = 'black',
                      newNames = c(CampaignsIndia_1962 = "India 1962", CampaignsSoviet_1969 = "Soviet 1969",
                                   CampaignsVietnam_1974 = "Vietnam 1974", CampaignsVietnam_1979 = "Vietnam 1979",
                                   CampaignsJapan_1990 = "Japan 1990",CampaignsJapan_1996 = "Japan 1996",
                                   CampaignsJapan_2005 = "Japan 2005",CampaignsJapan_2010 = "Japan 2010",
                                   CampaignsJapan_2012 = "Japan 2012",CampaignsPhilippines_2016 = "Philippines 2016",
                                   CampaignsIndia_2017 = "India 2017")) + 
  theme_bw()+
  ggtitle("Estimated Effect of Campaign on ANTUSD Sentiment Score") +
  xlab("Sentiment Score")

coef_graph$data$Coefficient <-factor(coef_graph$data$Coefficient,
                                     levels = rev(c("India 1962","Soviet 1969","Vietnam 1974",
                                                    "Vietnam 1979","Japan 1990","Japan 1996",
                                                    "Japan 2005","Japan 2010","Japan 2012",
                                                    "Philippines 2016","India 2017")))
coef_graph
